###############################################################################
## AUTHOR: ALAN YAN
## DATE LAST UPDATED: 11/13/2020
## PURPOSE: ANALYZE YOUGOV CONJOINT DATA (TRIMMING OUT LARGE INCOMES)
###############################################################################
rm(list = ls())

#### todo ####


#### PACKAGES ####
library(pacman)
p_load(tidyverse,
       estimatr,
       broom,
       haven,
       hrbrthemes,
       cjoint,
       formula.tools, 
       DeclareDesign,
       emmeans,
       lmtest,
       cregg,
       gridExtra,
       egg,
       car)

#### FUNCTIONS ####

# ggplot theme
theme_shom_alt <- function (base_size = 11, waffle = FALSE) 
{
  ret <- theme_minimal(base_size = base_size, base_family = "Roboto Condensed") + 
    theme(plot.background = element_rect(fill = "#f5f5f2", 
                                         color = NA), 
          panel.background = element_rect(fill = "#f5f5f2",color = NA), 
          legend.background = element_rect(fill = "#f5f5f2",color = NA))
  if (waffle) {
    ret + theme(axis.text = element_blank())
  }
  else {
    ret
  }
}

# lm robust function that includes omitted categories for plotting
lm_robust_omitted <- function(formula, data, clusters,weights = NULL) {
  require(formula.tools)
  require(tidyverse)
  require(haven)
  results <- lm_robust(formula = formula,
                       data = data,
                       clusters = clusters,
                       weights = weights) %>%
    tidy() 
  omitted_cat <- model.frame(formula = formula, data = data) %>% 
    select(2:length(.)) %>% 
    sapply(., levels) %>% 
    lapply(., "[[", 1) %>% 
    unlist() %>%
    as.data.frame()
  omitted_cat_df <- cbind(paste0(rownames(omitted_cat), omitted_cat[,1]), 
                          0, 0, 0, 0, 0, 0, 0, 
                          lhs.vars(formula)) %>%
    as.data.frame() %>%
    zap_labels() %>%
    setNames(., names(results))
  results_with_omitted <- rbind(results, 
                                omitted_cat_df)
  results_with_omitted[,c(2:8)] <- sapply(results_with_omitted[,c(2:8)],as.numeric)
  return(results_with_omitted)
}

#### LOAD DATA ####
df <- read_rds("01-yougov-conjoint/01-data/cleaned/yougov-conjoint-cleaned.rds")

#### ANALYZE DATA (NO CONTROLS) less than 200k ####
#subset out income attributes over 200k
df <- subset(df, df$cje_income %in% sort(unique(df$cje_income))[1:18])
#we lose about ~3/8 of the sample which makes sense because it represents a bit more than 1/3 of potential incomes

#Do people prefer firms with these features?
main_work_td <- lm_robust_omitted(work_binary ~ 
                                    as.factor(cje_corporate_gov) +
                                    as.factor(cje_corporate_resp) +
                                    as.factor(cje_firm_size) +
                                    as.factor(cje_political_donations) + 
                                    as.factor(cje_gender_ownership) + 
                                    as.factor(cje_healthcare) + 
                                    as.factor(cje_hours) + 
                                    as.factor(cje_training) + 
                                    as.factor(cje_location) + 
                                    as.factor(cje_sick_leave) + 
                                    as.factor(cje_parental_leave) + 
                                    as.factor(cje_race_ownership) + 
                                    as.factor(cje_retirement) + 
                                    as.factor(cje_income) +
                                    as.factor(cje_type_of_work) +
                                    as.factor(cje_union) +
                                    as.factor(cje_work_from_home) +
                                    as.factor(cje_team_work) +
                                    as.factor(cje_work_culture),
                                  data = df,
                                  clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = factor(gsub(x = term,pattern = "as.factor",replacement = "")),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#Do people believe some firms are better at dealing with complaints?
main_complaints_td <- lm_robust_omitted(complaints_binary ~ 
                                          as.factor(cje_corporate_gov) +
                                          as.factor(cje_corporate_resp) +
                                          as.factor(cje_firm_size) +
                                          as.factor(cje_political_donations) + 
                                          as.factor(cje_gender_ownership) + 
                                          as.factor(cje_healthcare) + 
                                          as.factor(cje_hours) + 
                                          as.factor(cje_training) + 
                                          as.factor(cje_location) + 
                                          as.factor(cje_sick_leave) + 
                                          as.factor(cje_parental_leave) + 
                                          as.factor(cje_race_ownership) + 
                                          as.factor(cje_retirement) + 
                                          as.factor(cje_income) +
                                          as.factor(cje_type_of_work) +
                                          as.factor(cje_union) +
                                          as.factor(cje_work_from_home) +
                                          as.factor(cje_team_work) +
                                          as.factor(cje_work_culture),
                                        data = df,
                                        clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#Do people believe they'll have more responsibilties at some firms?
main_responsibility_td <- lm_robust_omitted(responsibility_binary ~ 
                                              as.factor(cje_corporate_gov) +
                                              as.factor(cje_corporate_resp) +
                                              as.factor(cje_firm_size) +
                                              as.factor(cje_political_donations) + 
                                              as.factor(cje_gender_ownership) + 
                                              as.factor(cje_healthcare) + 
                                              as.factor(cje_hours) + 
                                              as.factor(cje_training) + 
                                              as.factor(cje_location) + 
                                              as.factor(cje_sick_leave) + 
                                              as.factor(cje_parental_leave) + 
                                              as.factor(cje_race_ownership) + 
                                              as.factor(cje_retirement) + 
                                              as.factor(cje_income) +
                                              as.factor(cje_type_of_work) +
                                              as.factor(cje_union) +
                                              as.factor(cje_work_from_home) +
                                              as.factor(cje_team_work) +
                                              as.factor(cje_work_culture),
                                            data = df,
                                            clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#Do people believe they'll have more power at some firms?
main_power_td <- lm_robust_omitted(power_binary ~ 
                                     as.factor(cje_corporate_gov) +
                                     as.factor(cje_corporate_resp) +
                                     as.factor(cje_firm_size) +
                                     as.factor(cje_political_donations) + 
                                     as.factor(cje_gender_ownership) + 
                                     as.factor(cje_healthcare) + 
                                     as.factor(cje_hours) + 
                                     as.factor(cje_training) + 
                                     as.factor(cje_location) + 
                                     as.factor(cje_sick_leave) + 
                                     as.factor(cje_parental_leave) + 
                                     as.factor(cje_race_ownership) + 
                                     as.factor(cje_retirement) + 
                                     as.factor(cje_income) +
                                     as.factor(cje_type_of_work) +
                                     as.factor(cje_union) +
                                     as.factor(cje_work_from_home) +
                                     as.factor(cje_team_work) +
                                     as.factor(cje_work_culture),
                                   data = df,
                                   clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#### test for differences in effects ####

work_mod <- lm_robust(work_binary ~ cje_corporate_gov,
                      data = df,
                      clusters = df$rid)
summary(work_mod)
linearHypothesis(work_mod, 
                 c("cje_corporate_govPublicly owned by shareholders = cje_corporate_govWorkers are shareholders",
                   "cje_corporate_govPublicly owned by shareholders = cje_corporate_govWorkers sit on the corporate board"))


#### PLOT JUST WORK AND POWER (FIGURE S12) ####

plot_tbl_work_power <- bind_rows(main_work_td,main_power_td) %>%
  mutate(outcome_clean = case_when(
    outcome == "work_binary" ~ "Prefer to Work",
    outcome == "power_binary" ~ "More Power"
  )) %>%
  mutate(outcome_clean = fct_relevel(outcome_clean,"Prefer to Work",
                                     "More Power")) %>%
  mutate(term_clean = gsub(x = term_clean,pattern = "(cje_corporate_gov)",replacement = "",fixed = T)) %>%
  ggplot(aes(x = term_clean,y = estimate)) +
  facet_wrap(~outcome_clean) + 
  geom_linerange(aes(ymin = conf.low.90,ymax = conf.high.90),alpha = 0.7,
                 color = "indianred",size = 3) + #CIs around point estimates
  geom_linerange(aes(ymin = conf.low,ymax = conf.high),alpha = 0.3,
                 color = "indianred",size = 3) + 
  geom_hline(yintercept = 0,linetype = 'dashed',size = 1.5) + 
  geom_point(size = 3,shape = 21,fill = "white") +
  theme_shom_alt(base_size = 16) + 
  theme(plot.caption = element_text(size = 10,hjust = 1),
        plot.caption.position = "plot",
        strip.background = element_rect(fill="grey"),
        strip.text = element_text(color = 'black'),
        legend.position = "bottom",
        strip.text.x = element_text(size = 11),
        strip.text.y = element_text(size = 11)) + 
  labs(x = "Treatment",
       y = "Average Marginal Component Effect Relative to Baseline Condition",
       caption = "Notes: Estimates generated via Ordinary Least Squares with standard errors clustered at the respondent level. 
       The dark shaded region represents the 90% confidence interval while the light shaded region represents the 95% confidence interval.
       An F-test of the equality of codetermination and employee ownership with public ownership for work preference yields a p-value of 0.062.") + 
  coord_flip() + 
  ggsave("01-yougov-conjoint/03-src/output/figures/main-effects-work-power-income-subset.pdf",
         dpi = 320,width = 10,height = 6,device = cairo_pdf)
plot_tbl_work_power

#### ANALYZE DATA (NO CONTROLS) less than 80k ####
#subset out income attributes over 80k
df <- df %>% filter(cje_income %in% sort(unique(df$cje_income))[1:6])
#we lose about ~3/8 of the sample which makes sense because it represents a bit more than 1/3 of potential incomes

#Do people prefer firms with these features?
main_work_td <- lm_robust_omitted(work_binary ~ 
                                    as.factor(cje_corporate_gov) +
                                    as.factor(cje_corporate_resp) +
                                    as.factor(cje_firm_size) +
                                    as.factor(cje_political_donations) + 
                                    as.factor(cje_gender_ownership) + 
                                    as.factor(cje_healthcare) + 
                                    as.factor(cje_hours) + 
                                    as.factor(cje_training) + 
                                    as.factor(cje_location) + 
                                    as.factor(cje_sick_leave) + 
                                    as.factor(cje_parental_leave) + 
                                    as.factor(cje_race_ownership) + 
                                    as.factor(cje_retirement) + 
                                    as.factor(cje_income) +
                                    as.factor(cje_type_of_work) +
                                    as.factor(cje_union) +
                                    as.factor(cje_work_from_home) +
                                    as.factor(cje_team_work) +
                                    as.factor(cje_work_culture),
                                  data = df,
                                  clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = factor(gsub(x = term,pattern = "as.factor",replacement = "")),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#Do people believe some firms are better at dealing with complaints?
main_complaints_td <- lm_robust_omitted(complaints_binary ~ 
                                          as.factor(cje_corporate_gov) +
                                          as.factor(cje_corporate_resp) +
                                          as.factor(cje_firm_size) +
                                          as.factor(cje_political_donations) + 
                                          as.factor(cje_gender_ownership) + 
                                          as.factor(cje_healthcare) + 
                                          as.factor(cje_hours) + 
                                          as.factor(cje_training) + 
                                          as.factor(cje_location) + 
                                          as.factor(cje_sick_leave) + 
                                          as.factor(cje_parental_leave) + 
                                          as.factor(cje_race_ownership) + 
                                          as.factor(cje_retirement) + 
                                          as.factor(cje_income) +
                                          as.factor(cje_type_of_work) +
                                          as.factor(cje_union) +
                                          as.factor(cje_work_from_home) +
                                          as.factor(cje_team_work) +
                                          as.factor(cje_work_culture),
                                        data = df,
                                        clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#Do people believe they'll have more responsibilties at some firms?
main_responsibility_td <- lm_robust_omitted(responsibility_binary ~ 
                                              as.factor(cje_corporate_gov) +
                                              as.factor(cje_corporate_resp) +
                                              as.factor(cje_firm_size) +
                                              as.factor(cje_political_donations) + 
                                              as.factor(cje_gender_ownership) + 
                                              as.factor(cje_healthcare) + 
                                              as.factor(cje_hours) + 
                                              as.factor(cje_training) + 
                                              as.factor(cje_location) + 
                                              as.factor(cje_sick_leave) + 
                                              as.factor(cje_parental_leave) + 
                                              as.factor(cje_race_ownership) + 
                                              as.factor(cje_retirement) + 
                                              as.factor(cje_income) +
                                              as.factor(cje_type_of_work) +
                                              as.factor(cje_union) +
                                              as.factor(cje_work_from_home) +
                                              as.factor(cje_team_work) +
                                              as.factor(cje_work_culture),
                                            data = df,
                                            clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#Do people believe they'll have more power at some firms?
main_power_td <- lm_robust_omitted(power_binary ~ 
                                     as.factor(cje_corporate_gov) +
                                     as.factor(cje_corporate_resp) +
                                     as.factor(cje_firm_size) +
                                     as.factor(cje_political_donations) + 
                                     as.factor(cje_gender_ownership) + 
                                     as.factor(cje_healthcare) + 
                                     as.factor(cje_hours) + 
                                     as.factor(cje_training) + 
                                     as.factor(cje_location) + 
                                     as.factor(cje_sick_leave) + 
                                     as.factor(cje_parental_leave) + 
                                     as.factor(cje_race_ownership) + 
                                     as.factor(cje_retirement) + 
                                     as.factor(cje_income) +
                                     as.factor(cje_type_of_work) +
                                     as.factor(cje_union) +
                                     as.factor(cje_work_from_home) +
                                     as.factor(cje_team_work) +
                                     as.factor(cje_work_culture),
                                   data = df,
                                   clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean))

#### test for differences in effects ####

work_mod <- lm_robust(work_binary ~ cje_corporate_gov,
                      data = df,
                      clusters = df$rid)
summary(work_mod)
linearHypothesis(work_mod, 
                 c("cje_corporate_govPublicly owned by shareholders = cje_corporate_govWorkers are shareholders",
                   "cje_corporate_govPublicly owned by shareholders = cje_corporate_govWorkers sit on the corporate board"))


#### PLOT JUST WORK AND POWER (FIGURE S13) ####

plot_tbl_work_power <- bind_rows(main_work_td,main_power_td) %>%
  mutate(outcome_clean = case_when(
    outcome == "work_binary" ~ "Prefer to Work",
    outcome == "power_binary" ~ "More Power"
  )) %>%
  mutate(outcome_clean = fct_relevel(outcome_clean,"Prefer to Work",
                                     "More Power")) %>%
  mutate(term_clean = gsub(x = term_clean,pattern = "(cje_corporate_gov)",replacement = "",fixed = T)) %>%
  ggplot(aes(x = term_clean,y = estimate)) +
  facet_wrap(~outcome_clean) + 
  geom_linerange(aes(ymin = conf.low.90,ymax = conf.high.90),alpha = 0.7,
                 color = "indianred",size = 3) + #CIs around point estimates
  geom_linerange(aes(ymin = conf.low,ymax = conf.high),alpha = 0.3,
                 color = "indianred",size = 3) + 
  geom_hline(yintercept = 0,linetype = 'dashed',size = 1.5) + 
  geom_point(size = 3,shape = 21,fill = "white") +
  theme_shom_alt(base_size = 16) + 
  theme(plot.caption = element_text(size = 10,hjust = 1),
        plot.caption.position = "plot",
        strip.background = element_rect(fill="grey"),
        strip.text = element_text(color = 'black'),
        legend.position = "bottom",
        strip.text.x = element_text(size = 11),
        strip.text.y = element_text(size = 11)) + 
  labs(x = "Treatment",
       y = "Average Marginal Component Effect Relative to Baseline Condition",
       caption = "Notes: Estimates generated via Ordinary Least Squares with standard errors clustered at the respondent level. 
       The dark shaded region represents the 90% confidence interval while the light shaded region represents the 95% confidence interval.") + 
  coord_flip() 
ggsave(plot_tbl_work_power, "01-yougov-conjoint/03-src/output/figures/main-effects-work-power-income-subset-under80k.pdf",
         dpi = 320, width = 10, height = 6, device = cairo_pdf)
plot_tbl_work_power
